/*
Project:				Perry Intergenerational
Author:					Victor Ronda
Original Date:			January 21, 2022
*/



*-------------------------------------------------------------------------------*
*  Computing the Treatment Effect Estimates and p-values for Main Children      *
*-------------------------------------------------------------------------------*



*------------------------------- Preamble --------------------------------------*



clear all
set more off
set matsize 10000
set maxvar 32767, permanently
version 15.0


*** Setting environment and filepaths ***
global klmshare : env klmshare
global klmperry : env klmperry
global data /share/klmperry/TE/data
global emp /share/klmperry/multigen_revision/Empirics

*-------------------------------------------------------------------------------*

*** Load penalized logit and penalized probit programs ***
do $emp/Scripts/penalized_logit.do

*--------------------------------- Globals -------------------------------------*

global xvars iq ses male mwork
global sxvars iq ses mwork

global dvarlist1 ghealth2 nspecialedu nsusp narrested
global dvarlist2 
global dvarlist3 ntpreg
global dvarlist4 
global dvarlist5 femp  yedu ndivor


local alist1 "00"
local alist2 "18"
local alist3 "19"
local alist4 "21"
local alist5 "23"


forval i = 1/5 {
	global depvarlist`i'
	foreach dvar in ${dvarlist`i'} {
		foreach k in `alist`i'' {
			global depvarlist`i' "${depvarlist`i'} `dvar'`k'"
		}
	}
}

global depvarlist "$depvarlist1 $depvarlist2 $depvarlist3 $depvarlist4 $depvarlist5"
global depvarlist2 ghealth2 nspecialedu nsusp narrested ntpreg19 femp23 yedu23 ndivor23



use $emp/Cleaned_data/children.dta, replace



*-------------------------------------------------------------------------------*
*        Computing the Treatment Effect Estimates           					*
*-------------------------------------------------------------------------------*

program simx, rclass

*sum age
*sum id
*sum idnew

foreach depvar in $depvarlist {
cap reg `depvar'  c.cg_age2016##c.cg_age2016
cap predict temp, res
cap gen `depvar'_temp=temp
cap drop temp
}

** Create variables for the different groups (pooled (2) /male (1) /female (0))

foreach depvar in $depvarlist {
	forval j=0/1 {
cap gen `depvar'_`j' = `depvar'_temp if cgmale==`j'
cap egen `depvar'_m`j' = mean(`depvar'_`j'), by(idnew)
cap drop `depvar'_`j''
}
}

foreach depvar in $depvarlist {
cap gen `depvar'_2 = `depvar'_temp 
cap egen `depvar'_m2 = mean(`depvar'_2), by(idnew)
cap drop `depvar'_2
cap drop `depvar'_temp
}


foreach k in 00 18 19 21 23 {
	cap gen interviewed`k' = (anychild`k'a < .)
}




cap gen streat = treat if age==10


*Compute a score for propensity of receiving treatment:
cap penlogit "streat" "$xvars" "" "predict pd if age==10, pr"

*Compute a score for propensity of being interviewed given the treatment status:
forval v = 0/1 {
	foreach k in 00 18 19 21 23 {
		cap su interviewed`k' if streat==`v'
		if r(mean)==1 {
			cap gen pi`k'`v' = 1 if interviewed`k'==1 & age==10
		}
		else {
			cap penlogit "interviewed`k'" "$xvars" "streat==`v'" "predict pi`k'`v' if interviewed`k'==1 & age==10, pr"
		}
	}
}


*** Computing treatment effect estimates and t-statistics for each outcome variable ***

foreach depvar in $depvarlist {
*qui {

local k = reverse(substr(reverse("`depvar'"), 1, 2)) //for subgroup analysis of participants' children at or over age k

//locals for gender of the children of the participants
local cm0 "0"
local cm1 "1"
local cm2 "0,1"

//locals for gender of the participant
local pm0 "0"
local pm1 "1"
local pm2 "0,1"

forval j = 0/2 {
	
	local dvar "`depvar'_m`j'"
	cap gen nmiss = (`dvar' <.) //indicator of non-missing data for the dependent variable

*** For AIPW estimates, generate non-missing-ness probabilities and conditional expectations of the outcome ***	
*Compute a score for propensity of having a non-missing outcome given the treatment and interview:	
	forval v = 0/1 {
		cap su nmiss if streat==`v' & interviewed`k'==1 & age==10
		if r(mean) == 1{
			cap gen pn`v' = 1 if nmiss==1 & streat==`v' & age==10
		}
		else {
			cap penlogit "nmiss" "$xvars" "streat==`v' & interviewed`k'==1" "predict pn`v' if nmiss==1 & streat==`v' & age==10, pr"
		}
	}

*Inverse probability weights		
cap gen a1 = streat/(pn1 * pi`k'1 * pd) if nmiss==1 & streat==1 & age==10
cap replace a1 = 0 if nmiss==1 & streat==0 & age==10
cap gen a0 = (1 - streat)/(pn0 * pi`k'0 * (1 - pd)) if nmiss==1 & streat==0 & age==10
cap replace a0 = 0 if nmiss==1 & streat==1 & age==10
cap gen a = a1 + a0

*OLS-based conditional expectations of the outcome to use in the AIPW estimator
local pm0 "0"
local pm1 "1"
forval i = 0/1 {
	forval v = 0/1 {
		cap count if inlist(male,`pm`i'') & `dvar' <. & streat==`v' & nmiss==1 & age==10
		cap local count_obs = r(N)
		if `count_obs' > 3 {
			cap reg `dvar' $sxvars if inlist(male,`pm`i'') & nmiss==1 & streat==`v' & age==10
			cap predict `dvar'_aipw_`v'_`i' if inlist(male,`pm`i'') & inlist(nmiss,0,1) & age==10
		}
		else if `count_obs' >= 1 & `count_obs' <= 3 {
			 cap su `dvar' if inlist(male,`pm`i'') & nmiss==1 & streat==`v' & age==10
			 cap gen `dvar'_aipw_`v'_`i' = r(mean) if inlist(male,`pm`i'') & inlist(nmiss,0,1) & age==10
		}
		else {
			cap gen `dvar'_aipw_`v'_`i' = . if inlist(male,`pm`i'') & inlist(nmiss,0,1) & age==10
		}
		
	}
}
cap gen `dvar'_aipw_1 = `dvar'_aipw_1_1 if male==1 & age==10
cap replace `dvar'_aipw_1 = `dvar'_aipw_1_0 if male==0 & age==10
cap gen `dvar'_aipw_0 = `dvar'_aipw_0_1 if male==1 & age==10
cap replace `dvar'_aipw_0 = `dvar'_aipw_0_0 if male==0 & age==10

*AIPW estimator at the individual level, which will be averaged later
cap gen te_aipw = (`dvar'_aipw_1 + a1 * (`dvar' - `dvar'_aipw_1)) - (`dvar'_aipw_0 + a0 * (`dvar' - `dvar'_aipw_0)) if nmiss==1 & age==10
cap replace te_aipw = `dvar'_aipw_1 - `dvar'_aipw_0 if nmiss==0 & age==10
//locals for gender of the participant
local pm0 "0"
local pm1 "1"
local pm2 "0,1"
forval i = 0/2 {

	cap su `dvar' if inlist(male,`pm`i'')  & age==10
	cap return scalar `depvar'_nmisscount_`j'`i' = r(N)
	 
	cap su `dvar' if inlist(male,`pm`i'') & streat==0 & age==10
	cap return scalar `depvar'_cmean_`j'`i' = r(mean)
	
	cap su `dvar' if inlist(male,`pm`i'') & streat==1 & age==10
	cap return scalar `depvar'_tmean_`j'`i' = r(mean)
	
*** AIPW (augmented inverse probability weighting-based treatment effect estimate) ***

	cap reg te_aipw if inlist(male,`pm`i'') & age==10, cluster(sid)
	cap return scalar `depvar'_aipwd_`j'`i' = _b[_cons]
	if _rc==0 {
		if _b[_cons]==0 {
			cap return scalar `depvar'_aipwt_`j'`i' = 0
		}
		else {
			cap return scalar `depvar'_aipwt_`j'`i' = _b[_cons]/_se[_cons]
		}
	}



*** UDIM (unconditional difference in means) ***	
	
	cap su streat if inlist(male,`pm`i'') & nmiss==1 & age==10
	if r(mean) > 0 & r(mean) < 1 {
		cap reg `dvar' streat if inlist(male,`pm`i'') & nmiss==1 & age==10, cluster(sid)
		cap return scalar `depvar'_udimd_`j'`i' = _b[streat]
		if _rc==0 {
			if _b[streat]==0 {
				cap return scalar `depvar'_udimt_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_udimt_`j'`i' = _b[streat]/_se[streat]
			}
		}
	}
	
*** COLS (conditional ordinary least squares-based treatment effect estimate) ***
	
	cap su streat if inlist(male,`pm`i'') & nmiss==1 & age==10
	if r(mean) > 0 & r(mean) < 1 {
		cap reg `dvar' streat $xvars if inlist(male,`pm`i'') & nmiss==1 & age==10, cluster(sid)
		cap return scalar `depvar'_colsd_`j'`i' = _b[streat]
		if _rc==0 {
			if _b[streat]==0 {
				cap return scalar `depvar'_colst_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_colst_`j'`i' = _b[streat]/_se[streat]
			}
		}
	}
	
	

		cap leebounds `dvar' streat if inlist(male,`pm`i'') if  age==10, select(anychild`k'a)
		cap mat leetable = r(table)
		if _rc==0 {
			cap return scalar `depvar'_lblld_`j'`i' = leetable[1,1]
			if leetable[1,1]==0 {
				cap return scalar `depvar'_lbllt_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_lbllt_`j'`i' = leetable[1,1]/leetable[2,1]
			}
			
			cap return scalar `depvar'_lbuld_`j'`i' = leetable[1,2]
			if leetable[1,2]==0 {
				cap return scalar `depvar'_lbult_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_lbult_`j'`i' = leetable[1,2]/leetable[2,2]
			}
		}	
	
}

foreach tempovar in nmiss pn0 pn1 a1 a0 a `dvar'_aipw_1_1 `dvar'_aipw_1_0 `dvar'_aipw_0_1 `dvar'_aipw_0_0 `dvar'_aipw_1 `dvar'_aipw_0 te_aipw {
	qui cap drop `tempovar'
}

}
	
*}
}

foreach tempovar in pd pi* *_m0 *_m1 *_m2 interviewed* {
	qui cap drop `tempovar'
}


*sum streat
cap drop streat 

end

global storeresults ""
foreach depvar in $depvarlist {
	forval j = 0/2 {
		forval i = 0/2 {
			foreach result in nmisscount cmean tmean udimd udimt colsd colst aipwd aipwt lblld lbllt lbuld lbult {
				global storeresults "$storeresults `depvar'_`result'_`j'`i' = r(`depvar'_`result'_`j'`i')"
			}
		}
	}
}



*** Main Estimates ***
simulate $storeresults, seed(12345) reps(1) noisily saving($emp/Results/estimates_children.dta, replace every(1)): simx
*** Estimates for Permutation Tests***
permute treat $storeresults, seed(12345) reps(5000) saving($emp/Results/estimates_perm_children.dta, replace every(20)): simx
*** Estimates for Bootstrap Tests***
bootstrap $storeresults, seed(12345) cluster(id) idcluster(idnew) nodrop reps(10000) saving($emp/Results/estimates_boot_children.dta, replace every(20)): simx


*-------------------------------------------------------------------------------*
*                         Computing Asymptotic P-Values                         *
*-------------------------------------------------------------------------------*

use $emp/Results/estimates_children.dta, clear
drop *_nmisscount_* *_cmean_* *_tmean_*

foreach dvar in $depvarlist {
foreach estm in udim cols aipw {
forval j = 0/2 {
forval i = 0/2 {

qui {

*Computing the asymptotic p-value based on asymptotic standard error
local aapval = normal(-abs(`dvar'_`estm't_`j'`i'[1]))

*Computing the asymptotic p-value based on bootstrap standard error
su `dvar'_`estm'd_`j'`i'
local abpval = normal(-abs(`dvar'_`estm'd_`j'`i'[1]/r(sd)))
if normal(-abs(`dvar'_`estm'd_`j'`i'[1]/r(sd)))==. {
local abpval = `aapval'
}

replace `dvar'_`estm'd_`j'`i' = `aapval' if _n==1
replace `dvar'_`estm't_`j'`i' = `abpval' if _n==1

}

}
}
}
}

keep if _n==1

save $emp/Results/asym_pval_children.dta, replace

*-------------------------------------------------------------------------------*
*                         Computing Permutation P-Values                         *
*-------------------------------------------------------------------------------*
use $emp/Results/estimates_children.dta, clear
append using $emp/Results/estimates_perm_children.dta
forval j = 0/2 {
forval i = 0/2 {
gen C_aipwd_`j'`i'=0
}
}
gen K=0


foreach dvar in $depvarlist {
forval j = 0/2 {
forval i = 0/2 {

qui {
replace C_aipwd_`j'`i'=C_aipwd_`j'`i'+1 if `dvar'_aipwd_`j'`i' >= 0


count if _n > 1 & `dvar'_aipwd_`j'`i' <.
local bootB = r(N)
count if _n > 1 & `dvar'_aipwd_`j'`i' >= `dvar'_aipwd_`j'`i'[1]
local p_r = (r(N) + 1)/`bootB'
replace `dvar'_aipwd_`j'`i' =  `p_r' if _n==1

}
}
}
}


forval j = 0/2 {
forval i = 0/2 {

qui {

count if _n > 1 & C_aipwd_`j'`i' <.
local bootB = r(N)
count if _n > 1 & C_aipwd_`j'`i' >= C_aipwd_`j'`i'[1]
local p_r = (r(N) + 1)/`bootB'

replace C_aipwd_`j'`i' =  `p_r' if _n==1
}
}
}

keep if _n==1


save $emp/Results/perm_pval_children.dta, replace

*-------------------------------------------------------------------------------*
*                         Computing Bootstrap P-Values                         *
*-------------------------------------------------------------------------------*
use $emp/Results/estimates_children.dta, clear
append using $emp/Results/estimates_boot_children.dta

foreach dvar in $depvarlist {
foreach estm in udim cols aipw {
forval j = 0/2 {
forval i = 0/2 {
egen temp=sd(`dvar'_`estm'd_`j'`i') if _n>1
egen `dvar'_`estm'se_`j'`i'=mean(temp)
drop temp
}
}
}
}

foreach dvar in $depvarlist {
foreach estm in aipw {
forval j = 0/2 {
forval i = 0/2 {

qui {


*Computing the bootstrap p-value (type II p-value) based on unstudentized test statistic

count if _n > 1 & `dvar'_`estm'd_`j'`i' <.
local bootB = r(N)
count if _n > 1 & `dvar'_`estm'd_`j'`i' >= 0
local p_r = (r(N) + 1)/`bootB'
count if _n > 1 & `dvar'_`estm'd_`j'`i' <= 0
local p_l = (r(N) + 1)/`bootB'

*Computing the bootstrap p-value (percentile-t p-value) based on studentized test statistic

count if _n > 1 & ( (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])/(`dvar'_`estm'd_`j'`i'/`dvar'_`estm't_`j'`i') >= `dvar'_`estm't_`j'`i'[1] | ((`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])==0 & (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1]) >= `dvar'_`estm't_`j'`i'[1] & `dvar'_`estm'd_`j'`i' <.) )
local q_r = (r(N) + 1)/`bootB'
count if _n > 1 & ( (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])/(`dvar'_`estm'd_`j'`i'/`dvar'_`estm't_`j'`i') <= `dvar'_`estm't_`j'`i'[1] | ((`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])==0 & (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1]) <= `dvar'_`estm't_`j'`i'[1] & `dvar'_`estm'd_`j'`i' <.) )
local q_l = (r(N) + 1)/`bootB'

replace `dvar'_`estm'd_`j'`i' =  min(`p_l',`p_r',1) if _n==1
replace `dvar'_`estm't_`j'`i' =  min(`q_l',`q_r',1) if _n==1

}

}
}
}
}


forval j = 0/2 {
forval i = 0/2 {
gen C_aipwd_`j'`i'=0
}
}
gen K=0
foreach dvar in $depvarlist {
replace K=K+1
forval j = 0/2 {
forval i = 0/2 {
replace C_aipwd_`j'`i'=C_aipwd_`j'`i'+1 if `dvar'_aipwd_`j'`i' >= 0
}
}
}
forval j = 0/2 {
forval i = 0/2 {
replace C_aipwd_`j'`i'=C_aipwd_`j'`i'/K
count if _n > 1 & C_aipwd_`j'`i' <.
local bootB = r(N)
count if _n > 1 & C_aipwd_`j'`i' <= 1/2
local p_r = (r(N) + 1)/`bootB'
replace C_aipwd_`j'`i' =  `p_r' if _n==1
}
}

forval j = 0/2 {
forval i = 0/2 {
gen C10_aipwd_`j'`i'=0
foreach dvar in $depvarlist {
forval k = 1/100 {
count if _n>(`k'-1)*100+1 & _n<=(`k')*100  & `dvar'_aipwd_`j'`i' <= 0 
local p_r = (r(N) + 1)/100
replace C10_aipwd_`j'`i'=C10_aipwd_`j'`i'+1 if `p_r'<=0.1 & _n==`k'
}
}
count if _n<=100 & C10_aipwd_`j'`i'<K/10
local p_r=(r(N))/100
replace C10_aipwd_`j'`i'=`p_r' if _n==1
}
}


keep if _n==1

save $emp/Results/boot_pval_children.dta, replace

